clear all
set matsize 10000
set more off
cap log close
cd "${master_dir}"
log using "${log_dir}/3-PrimaryAnalysis.log", replace
***************************************************************************************************
* 
* Program: 3-PrimaryAnalysis.do
* Purpose: Main analysis file for voting wait time & race analysis
* Sections:
*     1. Set globals and define programs used throughout
*     2. Methods (Figure 1)
*     3. Overall Wait Times (Figure 2)
*     4. Racial Disparities (Figure 3, Table 1, Figure 4)
*     5. Miscellaneous Statistics Cited Throughout Paper
* Files Used:
*     1. radiusdata.dta
*     2. voterwaittimes.dta
*     3. Pings_all_days.dta
*     4. likelyvoters_rad10.dta - likelyvoters_rad100.dta
*     5. IdentifiedRegularPingers.dta
*     6. block_group_data_2017.dta
*     7. PollingPlaces2016_w_TimeZones_and_Buildings.dta
*     8. cces_comparison.dta
* Files Created:
*     1. 3-PrimaryAnalysis.log
*     2. Figure 1: f1a_unique100m.png, f1b_radius.png
*     3. Figure 2: f2a_overallhistogram.png, f2c_overallarrival.png, f2d_overallwait.png
*     4. Figure 3: f3_disparity.png
*     5. Table 1: t1_main.tex
*     6. Figure 4: f4_robust.png (also produces app_A_t4_robust.tex and f4_robustoutput.dta)
* Notes:
*     Need to ssc install spmap, maptile, estout, gtools, plotplain if not already installed.
*     Figure 1c in produced in 5-PlaceboDayAnalysis.do
*     Figure 2b produced in ArcGIS
*
***************************************************************************************************

***************************************************************************************************
*  1. Set globals and define programs used throughout
***************************************************************************************************

* Set globals
global measure "waittime"
global group_decile "black_decile"
global other_race "race_asianpi race_hispanic race_othernonwhite"
global controls "pov_under_poverty_line pop pop_sqmi"

* Program to find medians to label in graph
cap program drop textbox_stats
program define textbox_stats, rclass
	sum $measure, d
	global median: di %6.0fc `r(p50)'
	global mean: di %6.0fc `r(mean)'
	global sd: di %6.0fc `r(sd)'
	sum $measure if $group_decile==1, d
	local median_d1= `r(p50)'
	sum $measure if $group_decile==10, d
	local median_d10=`r(p50)'
	local perc = (`median_d10'-`median_d1')*100/`median_d1'
	global percent: di %6.0fc `perc'
	global median_d1: di %6.0fc `median_d1'
	global median_d10: di %6.0fc `median_d10'
end

***************************************************************************************************
*  2. Methods (Figure 1, Table 1)
***************************************************************************************************

**********
* Figure 1: Constructing the Sample - Panels A and B [Find Panel C in 5-PlaceboDayAnalysis.do]
**********
use "$data_dir/radiusdata.dta", clear

* Original bound (100 meter radius)
separate dailycount_uniqueID_DistLess100, by(day==td(08nov2016))
sum dailycount_uniqueID_DistLess100 if day == td(08nov2016)
local n1 = r(mean)
local he1 = `n1' + 90000
local he2 = `n1' + 30000 
local dispn1: di %16.0fc `n1'
sum dailycount_uniqueID_DistLess100 if day == td(01nov2016)
local n2 = r(mean)
local hl1 = `n2' + 190000
local hl2 = `n2' + 200000
local dispn2: di %16.0fc `n2'
sum dailycount_uniqueID_DistLess100 if day == td(15nov2016)
local n3 = r(mean)
local hm1 = `n3' + 170000
local hm2 = `n3' + 180000
local dispn3: di %16.0fc `n3'
tw (bar dailycount_uniqueID_DistLess1001 day if tag_day == 1, barwidth(1) fcolor(vermillion) ///
  fintensity(50) lcolor(vermillion) legend(off)) (bar dailycount_uniqueID_DistLess1000 day ///
  if tag_day == 1, barwidth(1) fcolor(gray) fintensity(50) lcolor(gray) legend(off)), $white ///
  xtitle("Date in November 2016") ylab(0 "0" 200000 "200" 400000 "400" 600000 "600" 800000 "800" ///
  1000000 "1,000" 1200000 "1,200" 1400000 "1,400" 1600000 "1,600", nogrid format(%16.0gc) ///
  labsize(medium) angle(horizontal)) ytitle("Number of Unique Individuals (Thousands)") ///
  text(`he1' 20766 "Election Day", place(n)) ///
  text(`he2' 20765.55 " ""`dispn1'""", place(n)) ///
  text(`hl1' 20759.8 "Tuesday Before", place(s)) ///
  text(`hl2' 20758.9 " ""`dispn2'""", place(s)) ///
  text(`hm1' 20772.8 "Tuesday After", place(s)) ///
  text(`hm2' 20772.8 " ""`dispn3'""", place(s)) ///
  text(1000000 20763.5 "Weekend", place(n)) ///
  text(700000 20770 "Veteran's" "Day Weekend", place(n)) ///
  tlabel(01nov2016(1)15nov2016, format(%tdDD) labsize(small)) ///
  title("(a) Unique People by Day (within 100 meters of Polling Place)") ///
  saving($result_dir/f1a_unique100m.gph,replace)
graph export "$result_dir/f1a_unique100m.png",  replace 

* Show plateauing of count as one expands size of bound from polling place		
forvalues i = 10(10)100 {	
    gegen avg_dailycount_nonelect`i'tmp = mean(dailycount_uniqueID_DistLess`i') if day != td(08nov2016)	
    gegen avg_dailycount_nonelect`i'= max(avg_dailycount_nonelect`i'tmp)
    drop avg_dailycount_nonelect`i'tmp
    gen electnonelectdiff`i' = dailycount_uniqueID_DistLess`i' - avg_dailycount_nonelect`i' ///
      if day == td(08nov2016)
}

keep if day == td(08nov2016)
reshape long electnonelectdiff, i(day) j(dist10s)
sum electnonelectdiff if dist10s == 60
local n1 = r(mean)
local he1 = `n1' + 40000
local he2 = `n1' + 15000 
local dispn1: di %16.0fc `n1'

separate electnonelectdiff, by(dist10s == 60)
tw (bar electnonelectdiff1 dist10s, barwidth(10) fcolor(vermillion) fintensity(50) ///
  lcolor(vermillion) legend(off)) (bar electnonelectdiff0 dist10s, barwidth(10) fcolor(gray) ///
  fintensity(50) lcolor(gray) legend(off)), $white xtitle("Distance (Meters)") ylab(0 "0" ///
  100000 "100" 200000 "200" 300000 "300" 400000 "400" 500000 "500" 600000 "600", nogrid ///
  format(%16.0gc) labsize(medium) angle(horizontal)) ///
  ytitle("Number of Unique Individuals (Thousands)") ///
  text(`he1' 61 "Preferred (60m)", place(n)) ///
  text(`he2' 56.5 " ""`dispn1'""", place(n)) ///
  title("(b) Unique People by Distance from Polling Place") xlabel(10(10)100) ///
  saving($result_dir/f1b_radius.gph,replace)
graph export "$result_dir/f1b_radius.png",  replace
stop
***************************************************************************************************
*  3. Overall Wait Times (Figure 2)
***************************************************************************************************

**********
* Figure 2a: Overall Wait Time Histogram
**********
use "${data_dir}/voterwaittimes.dta", clear
keep if likelyvoter_v1 == 1 & enteredpoll == 1 & consistentpinger == 1 & reasonablevalues == 1
xtile black_decile = race_black, nquantiles(10)

qui textbox_stats
histogram waittime, width(1.5) frequency color(vermillion) fintensity(50) ///
  lcolor(vermillion) xtitle("Wait Time in Minutes", size(large)) ///
  ylab(0 "0" 1000 "1" 2000 "2" 3000 "3" 4000 "4" 5000 "5" 6000 "6" 7000 "7" 8000 "8" 9000 "9" ///
  10000 "10" 11000 "11" 12000 "12" 13000 "13", nogrid format(%16.0gc) labsize(medium) ///
  angle(horizontal)) ytitle("Number of Voters (Thousands)", size(large)) ///
  xlab(0(10)120, format(%16.0gc) labsize(medium)) ///
  text(10000 70 "Median:$median" "Mean:   $mean" "StdDev:$sd", $textbox size(huge)) ///
  graphregion(fcolor(white) lcolor(white)) ///
  saving($result_dir/f2a_overallhistogram.gph,replace)
graph export "$result_dir/f2a_overallhistogram.png",  replace 

**********
* Figure 2c & Figure 2d: Waits and Arrivals by Time of Day
**********

* Figure 2c: Volume of Voters by Hour of Arrival
histogram hour_of_arrival, discrete frequency color(vermillion) fintensity(50) lcolor(vermillion) ///
  $white xtitle("Hour of Day", size(large)) xlab(1(1)23, nogrid labsize(medium)) ///
  ytitle("Number of Voters (Thousands)", size(large)) ///
  ylab(0 "0" 1000 "1" 2000 "2" ///
  3000 "3" 4000 "4" 5000 "5" 6000 "6" 7000 "7" 8000 "8" 9000 "9" 10000 "10" 11000 "11" 12000 ///
  "12" 13000 "13" 14000 "14" 15000 "15" 16000 "16" 17000 "17" 18000 "18", ///
  nogrid format(%16.0gc) labsize(medium) angle(horizontal)) ///
  saving($result_dir/f2c_overallarrival.gph,replace)
graph export "$result_dir/f2c_overallarrival.png",  replace 

* Figure 2d: Average Wait Time by Hour of Arrival
egen tag_hour = tag(hour_of_arrival)
bysort hour_of_arrival: egen avgwait_by_hour = mean(waittime)
tw (bar avgwait_by_hour hour if tag_hour == 1 & hour >= 6 & hour <= 20, barwidth(1) ///
  color(vermillion) fintensity(50) lcolor(vermillion) legend(off)), $white xtitle("Hour of Day", ///
  size(large)) xlab(6(1)20, nogrid labsize(medium)) ytitle("Average Wait Time (Minutes)", ///
  size(large)) ylab(0(2)34, nogrid format(%16.0gc) labsize(medium) angle(horizontal)) ///
  saving($result_dir/f2d_overallwait.gph,replace)
graph export "$result_dir/f2d_overallwait.png",  replace  

***************************************************************************************************
*  4. Racial Disparities (Figure 3, Table 1, Figure 4)
***************************************************************************************************

**********
* Figure 3: PDFs of wait time by race (1st vs. 10th deciles)
**********
cd "${master_dir}"
use "${data_dir}/voterwaittimes.dta", clear
keep if likelyvoter_v1 == 1 & enteredpoll == 1 & consistentpinger == 1 & reasonablevalues == 1

xtile black_decile = race_black, nquantiles(10)
bysort black_decile: sum race_black waittime
unique PollingPlace_ID if black_decile == 1
unique PollingPlace_ID if black_decile == 10
twoway (kdensity waittime if black_decile == 1, bwidth(1) color("vermillion%100")) ///
       (kdensity waittime if black_decile == 10, bwidth(1) color(black) lpattern(dash) ///
       ytitle("Kernel Density", size(medium)) ylab(,nogrid format(%16.0gc) angle(horizontal)) ///
       xtitle("Wait Time in Minutes", size(medium)) legend(off) xlab(0(10)120, format(%16.0gc)) ///
       $white text(.050 35.5 "Lowest Fraction Black (1st Decile)", color(vermillion)) ///
       text(.017 50.5 "Highest Fraction Black (10th Decile)") ///
       saving($result_dir/f3_disparity.gph,replace))
graph export "$result_dir/f3_disparity.png",  replace 

**********
* Table 1: Main Results
**********

* Panel A -- OLS
use "${data_dir}/voterwaittimes.dta", clear
keep if likelyvoter_v1 == 1 & enteredpoll == 1 & consistentpinger == 1 & reasonablevalues == 1

eststo M1: reg waittime race_black, cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M2: reg waittime race_black $other_race, cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M3: reg waittime race_black $other_race $controls, cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M4: areg waittime race_black $other_race $controls, abs(statefips) cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M5: areg waittime race_black $other_race $controls, abs(statecountyfips) cl(PollingPlace_ID)
 sum waittime if e(sample), detail
 estadd scalar DepVarMean=r(mean)
 
* Panel B -- LPM
use "${data_dir}/voterwaittimes.dta", clear
keep if likelyvoter_v1 == 1 & enteredpoll == 1 & consistentpinger == 1 & reasonablevalues == 1

eststo M6: reg wait_over_30min race_black, cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M7: reg wait_over_30min race_black $other_race, cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M8: reg wait_over_30min race_black $other_race $controls, cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M9: areg wait_over_30min race_black $other_race $controls, abs(statefips) cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
eststo M10: areg wait_over_30min race_black $other_race $controls, abs(statecountyfips) ///
  cl(PollingPlace_ID)
 sum wait_over_30min if e(sample), detail
 estadd scalar DepVarMean=r(mean)
 
#d ;
esttab M1 M2 M3 M4 M5 using $result_dir/t1_main.tex, replace 
  keep(race_black race_asianpi race_hispanic race_othernonwhite) 
  cells(b(star fmt(2)) se(par fmt(2))) legend sty(fixed) star(* 0.10 ** 0.05 *** 0.01) 
  stat(N r2 DepVarMean, fmt(%16.0gc 2 2) label("N" "\$R^2$")) 
  noabbrev label gaps varwidth(30) booktabs nodep nonum nomtitles collabels(none)
  prehead(`"{"' `"\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}"'
  `"\begin{tabular}{l*{5}{c}}"'`"\toprule"' `"&\multicolumn{1}{c}{(1)}&\multicolumn{1}{c}{(2)}
  &\multicolumn{1}{c}{(3)}&\multicolumn{1}{c}{(4)}&\multicolumn{1}{c}{(5)}\\"'
  `"\multicolumn{5}{l}{\textbf{Panel A: Ordinary Least Squares (Y = Wait Time)}} \\"' `"\hline"')
  postfoot(`"Polling Area Controls? &No&No&Yes&Yes&Yes \\"'
  `"State FE?                       &No&No&No&Yes&Yes \\"'
  `"County FE?                       &No&No&No&No&Yes \\"'
  `"\hline"') ;
esttab M6 M7 M8 M9 M10 using $result_dir/t1_main.tex, append
  keep(race_black race_asianpi race_hispanic race_othernonwhite) 
  cells(b(star fmt(2)) se(par fmt(2))) legend sty(fixed) star(* 0.10 ** 0.05 *** 0.01) 
  stat(N r2 DepVarMean, fmt(%16.0gc 2 2) label("N" "\$R^2$")) 
  noabbrev label gaps varwidth(30) booktabs nodep nonum nomtitles collabels(none) 
  prehead(
  `"\multicolumn{5}{l}{\textbf{Panel B: Linear Probability Model (Y = Wait Time $>$ 30min)}} \\"'
  `"\hline"') postfoot(`"Polling Area Controls? &No&No&Yes&Yes&Yes \\"'
  `"State FE?                       &No&No&No&Yes&Yes \\"'
  `"County FE?                       &No&No&No&No&Yes \\"'
  `"\hline\hline"' `"\bottomrule"' 
  `"\multicolumn{6}{l}{\footnotesize \sym{*} \(p<0.10\), 
  \sym{**} \(p<0.05\), \sym{***} \(p<0.01\)} \\"' `"\end{tabular}"' `"}"') ;
#d cr


**********
* Figure 4: Robustness (and Appendix Table A4)
**********

* [PART 1] Construct Data/Tables Used for Analysis:
use "${data_dir}/voterwaittimes.dta", clear
keep if likelyvoter_v1 == 1 & enteredpoll == 1 & consistentpinger == 1 & reasonablevalues == 1

* Three Sets of Loops & Variable Constructions:
* A) Over different parts of the lower to upper bound split
* B) Over different "reasonable values" filter definitions (widest is 5 hours, etc.)
* C) Construct different wait time measures based on radii other than 60m
* First construct the measures & loop over the regressions and save in matrices, then collapse
* and graph the matrix data

* [A] Loop over parts of the lower to upper bound split (Deciles)
for X in any 0 1 2 3 4 5 6 7 8 9 10: gen boundsplitX = lowerbound + (X*(upperbound - lowerbound))/10
for X in any b se: matrix define boundsplit_X = J(1,11,.)

forvalues i = 0/10 {
    local j = `i'+1
    disp "Wait time measure: Lower Bound + `i'0% of Diff between Lower and Upper Bound"
    qui reg boundsplit`i' race_black, cl(PollingPlace_ID)
    disp _b[race_black]
    qui estimates store BS`i'
    qui matrix boundsplit_b[1,`j'] = _b[race_black]
    qui matrix boundsplit_se[1,`j'] = _se[race_black]
    sum boundsplit`i' if e(sample)
    qui estadd scalar DepVarMean = r(mean)
}

* [B] Loop over reasonable values definitions (Under 5hours,...,Under 1 hour,...Btwn 4min to 1hour)
use "${data_dir}/voterwaittimes.dta", clear
gen reasonablevalues1 = ((upperbound > 1 & upperbound != .) & (upperbound < (60*5)))
gen reasonablevalues2 = ((upperbound > 1 & upperbound != .) & (upperbound < (60*4)))
gen reasonablevalues3 = ((upperbound > 1 & upperbound != .) & (upperbound < (60*3)))
gen reasonablevalues4 = ((upperbound > 1 & upperbound != .) & (upperbound < (60*2)))
gen reasonablevalues5 = ((upperbound > 1.5 & upperbound != .) & (upperbound < (60*2)))
gen reasonablevalues6 = ((upperbound > 2 & upperbound != .) & (upperbound < (60*2)))
gen reasonablevalues7 = ((upperbound > 2 & upperbound != .) & (upperbound < (60*1)))
gen reasonablevalues8 = ((upperbound > 2.5 & upperbound != .) & (upperbound < (60*1)))
gen reasonablevalues9 = ((upperbound > 3 & upperbound != .) & (upperbound < (60*1)))
gen reasonablevalues10 = ((upperbound > 4 & upperbound != .) & (upperbound < (60*1)))
for X in any b se: matrix define reasonable_X = J(1,10,.)
 
forvalues i = 1/10 {
    preserve
    disp "Reasonable values measure `i'"
    qui keep if likelyvoter_v1 == 1 & enteredpoll == 1 & consistentpinger == 1 ///
      & reasonablevalues`i' == 1
    qui reg waittime race_black, cl(PollingPlace_ID)
    disp _b[race_black]
    qui estimates store RV`i'
    qui matrix reasonable_b[1,`i'] = _b[race_black]
    qui matrix reasonable_se[1,`i'] = _se[race_black]
    sum waittime if e(sample)
    qui estadd scalar DepVarMean = r(mean)
    restore
}

/* [C] Construct radii waittime measures (10 to 100m)
for X in any b se: matrix define radius_X = J(1,10,.)
local j = 0
foreach X in 10 20 30 40 50 60 70 80 90 100 {
    qui local j = `j'+1
    disp "Radius = `X'm"
    qui use ID_11 PollingPlace_ID gisjoin Ping_in_ConvexHull local_date_sec ///
      Dist_to_PollingPlace_M day Sec_Since_Last Sec_Till_Next_Ping statefips statecountyfips ///
      if day == td(08nov2016) & Dist_to_PollingPlace_M <= `X'  ///
      using "${data_dir}/Pings_all_days.dta", clear
      
    qui merge m:1 ID_11_16 using "$data_dir/likelyvoters_rad`X'.dta", keepusing(likelyvoter_v1_d08)
    qui drop if _merge == 2
    qui replace likelyvoter_v1_d08 = 0 if likelyvoter_v1_d08 == .
    qui drop _merge
    qui rename likelyvoter_v1_d08 likelyvoter_v1
    
    * Merge in "Regular Pinger" Person Info (# of unique hours pinged during Election Day)
    qui merge m:1 ID_11_16 using "$data_dir/IdentifiedRegularPingers.dta"
    qui drop if _merge == 2
    qui drop _merge
    
    * Generate "Consistent Pinger" filter (more than median number of hours in full data, i.e. >=12)
    qui gen consistentpinger = (uniquepinghours >= 12 & uniquepinghours != .)
    
    * Generate "Entered Polling Place" filter (did any ping on Election Day ever enter convex hull 
    * of building)
    qui gegen enteredpoll = max(Ping_in_ConvexHull), by(ID_11_16 PollingPlace_ID)
    
    * Construct waittime variables
    qui gegen double earliestping = min(local_date_sec), by(ID_11 PollingPlace_ID)
    qui gegen double latestping = max(local_date_sec), by(ID_11 PollingPlace_ID)
    qui gen double lowerbound = (latestping - earliestping)/1000
    
    qui gen double sec_before_earliest_ping1 = Sec_Since_Last if earliestping == local_date_sec
    qui gegen double sec_before_earliest_ping = max(sec_before_earliest_ping1), ///
	  by(ID_11 PollingPlace_ID)
    qui drop sec_before_earliest_ping1
    
    qui gen double sec_after_latest_ping1 = Sec_Till_Next_Ping if latestping == local_date_sec
    qui gegen double sec_after_latest_ping = max(sec_after_latest_ping1), by(ID_11 PollingPlace_ID)
    qui drop sec_after_latest_ping1
    
    qui gen double upperbound = lowerbound + sec_before_earliest_ping + sec_after_latest_ping
    qui replace upperbound = upperbound / 60
    qui replace lowerbound = lowerbound / 60
    qui gen waittime = lowerbound + (upperbound - lowerbound)/2

    * Generate "Reasonable Values" Filter (More than 1 minute & less than 2 hours)
    qui gen reasonablevalues = ((upperbound > 1 & upperbound != .) & (upperbound < (60*2)))

    * Hour of arrival on Election Day (hour of earliest ping in the 60m radius)
    qui gen hour_of_arrival = hh(earliestping)

    * Merge in Census Demographic Data (by polling place's block group)
    qui sort gisjoin
    qui merge m:1 gisjoin using "$raw_dir/block_group_data_2017.dta"
    qui drop if _merge == 2
    qui drop _merge
	
	* Create ID tag
	qui gsort -upperbound
	qui gegen tag_ID = tag(ID)
    qui keep if tag_ID == 1
    
    * Keep only filtered voters
    qui keep if likelyvoter_v1 == 1 & enteredpoll == 1 & consistentpinger == 1 ///
      & reasonablevalues == 1
    qui reg waittime race_black, cl(PollingPlace_ID)
    
    qui reg waittime race_black, cl(PollingPlace_ID)
    disp _b[race_black]
    qui estimates store Rad`X'
    qui matrix radius_b[1,`j'] = _b[race_black]
    qui matrix radius_se[1,`j'] = _se[race_black]
    sum waittime if e(sample)
    qui estadd scalar DepVarMean = r(mean)
}
*/

#d ;
esttab BS0 BS1 BS2 BS3 BS4 BS5 BS6 BS7 BS8 BS9 BS10 using $result_dir/app_A_t4_robust.tex, replace 
  keep(race_black) cells(b(star fmt(2)) se(par fmt(2))) legend sty(fixed) 
  star(* 0.10 ** 0.05 *** 0.01) stat(N r2 DepVarMean, fmt(%16.0gc 2 2) label("N" "\$R^2$")) 
  noabbrev label gaps varwidth(30) booktabs nodep nonum mtitle(Lower S1 S2 S3 S4 Midpoint S6 S7 
  S8 S9 Upper) coeflabel(race_black "Fraction Black") collabels(none)
  prehead(`"{"' `"\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}"'
  `"\begin{tabular}{l*{11}{c}}"'`"\toprule"' `"&\multicolumn{1}{c}{(1)}&\multicolumn{1}{c}{(2)}
  &\multicolumn{1}{c}{(3)}&\multicolumn{1}{c}{(4)}&\multicolumn{1}{c}{(5)}
  &\multicolumn{1}{c}{(6)}&\multicolumn{1}{c}{(7)}&\multicolumn{1}{c}{(8)}
  &\multicolumn{1}{c}{(9)}&\multicolumn{1}{c}{(10)}&\multicolumn{1}{c}{(11)}\\"'
  `"\multicolumn{11}{l}{\textbf{Panel A: Lower to Upper Bound Split (10\% increments)}} \\"' 
  `"\hline"') postfoot(`"\hline"') ;
esttab RV1 RV2 RV3 RV4 RV5 RV6 RV7 RV8 RV9 RV10 using $result_dir/app_A_t4_robust.tex, append
  keep(race_black) cells(b(star fmt(2)) se(par fmt(2))) legend sty(fixed) 
  star(* 0.10 ** 0.05 *** 0.01) stat(N r2 DepVarMean, fmt(%16.0gc 2 2) label("N" "\$R^2$")) 
  noabbrev label gaps varwidth(30) booktabs nodep nonum mtitle(RV1 RV2 RV3 RV4 RV5 RV6 RV7 RV8 
  RV9 RV10) coeflabel(race_black "Fraction Black") collabels(none) 
  prehead(`"\multicolumn{11}{l}{\textbf{Panel B: Reasonable Values (See Notes)}} \\"'
  `"\hline"') postfoot(`"\hline"') ;
#d cr
/*
esttab Rad10 Rad20 Rad30 Rad40 Rad50 Rad60 Rad70 Rad80 Rad90 Rad100 using 
  $result_dir/app_A_t4_robust.tex, append keep(race_black) cells(b(star fmt(2)) se(par fmt(2))) 
  legend sty(fixed) star(* 0.10 ** 0.05 *** 0.01) stat(N r2 DepVarMean, fmt(%16.0gc 2 2) 
  label("N" "\$R^2$")) noabbrev label gaps varwidth(30) booktabs nodep nonum mtitle(Rad10 Rad20 
  Rad30 Rad40 Rad50 Rad60 Rad70 Rad80 Rad90 Rad100) coeflabel(race_black "Fraction Black") 
  collabels(none) 
  prehead(`"\multicolumn{11}{l}{\textbf{Panel C: Radius Around Building (10 to 100 meters)}} \\"'
  `"\hline"') postfoot(`"\hline\hline"' `"\bottomrule"' 
  `"\multicolumn{12}{l}{\footnotesize \sym{*} \(p<0.10\), 
  \sym{**} \(p<0.05\), \sym{***} \(p<0.01\)} \\"' `"\end{tabular}"' `"}"') ;
*/

clear
set obs 11
gen BS=_n-1
gen RV=_n
gen Rad=_n
for X in any coeff cil ciu: gen race_black_BS_X = .
for X in any coeff cil ciu: gen race_black_RV_X = .
for X in any coeff cil ciu: gen race_black_Rad_X = .
forvalues i = 0/10 {
    local j = `i'+1
    replace race_black_BS_coeff = boundsplit_b[1,`j'] if BS == `i'
    replace race_black_BS_cil = boundsplit_b[1,`j'] - 1.96*boundsplit_se[1,`j'] ///
      if BS == `i'
    replace race_black_BS_ciu = boundsplit_b[1,`j'] + 1.96*boundsplit_se[1,`j'] ///
      if BS == `i'
}
forvalues i = 1/10 {
    replace race_black_RV_coeff = reasonable_b[1,`i'] if RV == `i'
    replace race_black_RV_cil = reasonable_b[1,`i'] - 1.96*reasonable_se[1,`i'] ///
      if RV == `i'
    replace race_black_RV_ciu = reasonable_b[1,`i'] + 1.96*reasonable_se[1,`i'] ///
      if RV == `i'
    }
/*
forvalues i = 1/10 {
    replace race_black_Rad_coeff = radius_b[1,`i'] if Rad == `i'
    replace race_black_Rad_cil = radius_b[1,`i'] - 1.96*radius_se[1,`i'] ///
      if Rad == `i'
    replace race_black_Rad_ciu = radius_b[1,`i'] + 1.96*radius_se[1,`i'] ///
      if Rad == `i'
}
replace Rad = Rad*10  
*/
save "$data_dir/f4_robustoutput.dta", replace

* [PART 2] Robustness Figure:
use "$data_dir/f4_robustoutput.dta", replace

* Panel A: Bound Split
sum race_black_BS_coeff if BS == 5
local primary_coeff = r(mean)

tw (scatter BS race_black_BS_coeff, mcolor(blue) ///
  msize(medlarge)) (rcap race_black_BS_cil race_black_BS_ciu BS, horizontal ///
  lcolor(ltblue) legend(off)), $white title("(a) Wait Time Measure Splits", size(large)) ///
  xlab(0(1)8, nogrid valuelabel labsize(large) format(%16.0gc) angle(horizontal)) ///
  ylab(0(1)10.5, nogrid valuelabel labsize(large) angle(horizontal)) ///
  xline(`primary_coeff', lpattern(solid) lcolor(red)) /// 
  ytitle("Lower to Upper Bound Split (10% increments)", size(large)) ///
  saving($result_dir/f4a_boundsplit.gph,replace)
graph export $result_dir/f4a_boundsplit.png, replace

* Panel B: Reasonable Values
sum race_black_RV_coeff if RV == 4
local primary_coeff = r(mean)
tw (scatter RV race_black_RV_coeff, mcolor(blue) ///
  msize(medlarge)) (rcap race_black_RV_cil race_black_RV_ciu RV, horizontal ///
  lcolor(ltblue) legend(off)), $white ///
  title("(b) Reasonable Values Definitions", size(large)) ///
  xlab(0(1)8, nogrid valuelabel labsize(large) format(%16.0gc) angle(horizontal)) ///
  ylab(1(1)10, nogrid valuelabel labsize(large) angle(horizontal)) ///
  xline(`primary_coeff', lpattern(solid) lcolor(red)) ///
  ytitle("Reasonable Values (See Notes)", size(large)) ///
  saving($result_dir/f4b_reasonable.gph,replace)
graph export $result_dir/f4b_reasonable.png, replace

/* Panel C: Radius
sum race_black_Rad_coeff if Rad == 60
local primary_coeff = r(mean)
tw (scatter Rad race_black_Rad_coeff, mcolor(blue) ///
  msize(medlarge)) (rcap race_black_Rad_cil race_black_Rad_ciu Rad, horizontal ///
  lcolor(ltblue) legend(off)), $white ///
  title("(c) Radius Around Building", size(large)) ///
  xlab(1(1)8, nogrid valuelabel labsize(large) format(%16.0gc) angle(horizontal)) ///
  ylab(10(10)100, nogrid valuelabel labsize(large) angle(horizontal)) ///
  xline(`primary_coeff', lpattern(solid) lcolor(red)) ///
  ytitle("Radius (10 to 100 meters)", size(large)) ///
  saving($result_dir/f4c_radius.gph,replace)
graph export $result_dir/f4c_radius.png, replace
*/

*graph combine "$result_dir/f4a_boundsplit" "$result_dir/f4b_reasonable" "$result_dir/f4c_radius", ///
graph combine "$result_dir/f4a_boundsplit" "$result_dir/f4b_reasonable", ///
  graphregion(fcolor(white) lcolor(white)) imargin(20) iscale(* .6) cols(3) rows(1) ///
  b1("Coefficients on 'Fraction Black' from Separate Regressions", size(small)) ///
  colfirst saving($result_dir/f4_robust.gph, replace)
graph use $result_dir/f4_robust.gph
graph export $result_dir/f4_robust.png, replace

***************************************************************************************************
*  5. Miscellaneous Statistics Cited Throughout Paper
***************************************************************************************************

* Abstract: How long do people wait in full sample?
use "${data_dir}/voterwaittimes.dta", clear
keep if likelyvoter_v1 == 1 & enteredpoll == 1 & consistentpinger == 1 & reasonablevalues == 1
sum waittime wait_over_30min

* Abstract: How much longer is wait compared to "entirely white neighborhoods" (use regression of 
*           waittime on all racial groups and compare the coefficient on race_black to constant,
*           i.e. linear projections of extra wait time in entirely black vs. mean in entirely white)
reg waittime race_black $other_race, cl(PollingPlace_ID)
disp _b[race_black]/_b[_cons]

reg wait_over_30min race_black $other_race, cl(PollingPlace_ID)
disp _b[race_black]/_b[_cons]

* Intro: How many polling places do we have in the raw data?
use "${raw_dir}/PollingPlaces2016_w_TimeZones_and_Buildings", clear
unique PollingPlace_ID

* Intro: How many polling places do we have in our merged file? How many after we apply filters?
use "${data_dir}/voterwaittimes.dta", clear
unique PollingPlace_ID
keep if likelyvoter_v1 == 1 & enteredpoll == 1 & consistentpinger == 1 & reasonablevalues == 1
unique PollingPlace_ID
disp `r(sum)'/116900

* Data & Methods: What is the mean & median wait time in our final analysis sample? 
sum waittime wait_over_30min, detail

* Racial Disparities: How about for the top & bottom deciles of fraction black?
xtile black_decile = race_black, nquantiles(10)
sum race_black waittime wait_over_30min if black_decile == 1
sum race_black waittime wait_over_30min if black_decile == 10

* Racial Disparities: How many voters & polling places do those two deciles have in our data?
unique PollingPlace_ID if black_decile == 1
unique PollingPlace_ID if black_decile == 10

* Correlation in CCES and smartphone coefficients
use "${data_dir}/cces_comparison.dta", clear
corr state_coef_ebayes state_cces_coef_ebayes if tag_state == 1
corr cd_coef_ebayes cd_cces_coef_ebayes

log close


